@InProceedings{CavalinDornCruz:2016:ClLiEv,
author = "Cavalin, Paulo and Dornelas, Fillipe and Cruz, Sergio",
affiliation = "{IBM Research} and IBM Research, Universidade Federal Rural do Rio
de Janeiro and {Universidade Federal Rural do Rio de Janeiro}",
title = "Classification of Life Events on Social Media",
booktitle = "Proceedings...",
year = "2016",
editor = "Aliaga, Daniel G. and Davis, Larry S. and Farias, Ricardo C. and
Fernandes, Leandro A. F. and Gibson, Stuart J. and Giraldi, Gilson
A. and Gois, Jo{\~a}o Paulo and Maciel, Anderson and Menotti,
David and Miranda, Paulo A. V. and Musse, Soraia and Namikawa,
Laercio and Pamplona, Mauricio and Papa, Jo{\~a}o Paulo and
Santos, Jefersson dos and Schwartz, William Robson and Thomaz,
Carlos E.",
organization = "Conference on Graphics, Patterns and Images, 29. (SIBGRAPI)",
publisher = "Sociedade Brasileira de Computa{\c{c}}{\~a}o",
address = "Porto Alegre",
keywords = "Social Media, Life Events, Classification, Umbalanced datasets.",
abstract = "In this paper we present an investigation of life event
classification on social media networks. Detecting personal
mentions about life events, such as travel, birthday, wedding,
etc, presents an interesting opportunity to anticipate the offer
of products or services, as well to enhance the demographics of a
given target population. Nevertheless, life event classification
can be seen as an unbalanced classification problem, where the set
of posts that actually mention a life event is significantly
smaller than those that do not. For this reason, the main goal of
this paper is to investigate different types of classifiers, on a
experimental protocol based on datasets containing various types
of life events in both Portuguese and English languages, and the
benefits of over-sampling techniques to improve the accuracy of
these classifiers on these sets. The results demonstrate that a
Logistic Regression may be a poor choice to deal with the original
datasets, but after over-sampling the training set, such
classifier is able to outperform by a significant margin other
classifiers such as Naive Bayes and Nearest Neighbours, which do
not benefit as well from the over-sampled training set in most
cases.",
conference-location = "S{\~a}o Jos{\'e} dos Campos, SP, Brazil",
conference-year = "4-7 Oct. 2016",
language = "en",
ibi = "8JMKD3MGPAW/3MC59RH",
url = "http://urlib.net/ibi/8JMKD3MGPAW/3MC59RH",
targetfile = "SibgrapiWIA_LifeEvents_2016_cameraready.pdf",
urlaccessdate = "2024, Apr. 28"
}